/**************************************/
/*Inferring industry classification codes in the workplace data*/
/*************************************/

/*The logic of the code is the following:
- First, we constructed a consistent series of SNI92 and SNI2002 codes in the following way :
	(1) We kept and copied available SNI 2002 codes between 1990 and 2007 to obtain the same SNI2002 codes across time.
	(2) If no SNI 2002 is available, we rely on an approximation. To this end, we matched the workplace datafile with correspondence_92_2002.dta file that contains an approximation of unique SNI 92 - SNI 2002 pairs, based on Statistics Sweden's keys. As the SNI92 and SNI2002 systems are quite close, we kept unique matches, and retained the first records when there was no unique mapping. E.g., when 3 different SNI2002 values correspond to one SNI92, the first SNI2002 value was retained.  

Remark: We believe it is easier to start this way as the conversion between SNI 92 and SNI 2002 is more certain

- Correspondence between SNI 2002 and SNI 2007:
	(3) We retained and copied available SNI 2007 codes back to 1990. 
	(3) We merge the workplace data with the correspondence_2002_2007.dta file to infer the industry classification under SNI 2007 for those installations that do not have such codes (the file contains the conversion between SNI2002 and SNI2007 system). */

	
*cd "/RFS_replication_package/Martinsson_et_al_datasets"

use "arbetsstallen.dta", clear   /*Workplace data, provided by Statistics Sweden*/

sort bidnr cfarnr_lopnr year, stable  /*Bidnr is a firm-level unique identifier while cfarnr_lopnr identifies workplaces*/
 
rename ast_sni92 sni92 /*ast_sni92 is the SNI92 code in the workplace-level data (Sw: arbetstalle)*/
merge m:1 sni92 using correspondence_92_2002_20231014.dta  /*We match the workplace-level data with inferred correspondence table in order to approximate the workplaces SNI code when exact match between SNI 92 and SNI 2002 is not available (see above)*/

*merge m:1 sni92 using sni92_2002_unique.dta
*rename sni92 ast_sni92

drop if _merge==2
drop _merge

*The correspondence table contains SNI 2002 codes in the variable "sni2002" while the equivalent variable in the workplace data is "ast_sni2002". We merge both observed and inferred SNI 2002 codes by copying "ast_sni2002" codes to "sni2002" if "sni2002" codes are not available but "ast_sni2002" can be observed. *
g id=_n	
	su id, meanonly
		
	quietly{
	local i
		local s=`r(max)'
	forvalues i=1(1)`s'{
	
	
 replace sni2002=ast_sni2002[`i'] in `i' if ast_sni2002[`i']!="00000" & ast_sni2002[`i']!="" 
 
 }
 }
g year2=-year  /*We turn sorting so we obtain the matching for earlier years, too*/

sort bidnr cfarnr_lopnr year2, stable

*cd "\\micro.intra\Projekt\P0789$\P0789_Gem\Laszlo\uc_19"
	
drop id
g id=_n	
	su id, meanonly
	
	quietly{
	local i
		local s=`r(max)'-1
	forvalues i=1(1)`s'{
local j=`i'+1

replace ast_sni2002	 = ast_sni2002[`i'] in `j' if ast_sni2002[`i']!=ast_sni2002[`j'] & ast_sni2002[`i']!="" & ast_sni2002[`j']!="" & bidnr[`i']==bidnr[`j'] & cfarnr_lopnr[`i']==cfarnr_lopnr[`j']
}
}


/*Obtaining SNi2007 codes */
drop id

merge m:1 sni2002 using correspondence_2002_2007_20231014.dta

drop if _merge==2

*Replace SNI 2007 codes from the correspondence table if the workplace is not assigned to any SNI 2007 code - in descending and ascending order with respect to firm-years*
*We create an auxiliary SNI 2007 variable that could be modified during the process:


sort bidnr cfarnr_lopnr year2, stable  /*We start with descending order*/
rename  sni2007 sni2007_1
clonevar sni2007=ast_sni2007
replace sni2007="" if sni2007=="00000"
 
  g id=_n	
	su id, meanonly
		
	quietly{
	local i
		local s=`r(max)'-1
	forvalues i=1(1)`s'{
	local j=`i'+1
	
 replace sni2007=sni2007[`i'] in `j' if sni2007[`i']!="00000" & sni2007[`i']!="" & bidnr[`i']==bidnr[`j'] & cfarnr_lopnr[`i']==cfarnr_lopnr[`j'] & ast_sni2007[`j']=="" 
 
 }
 }
 
sort bidnr cfarnr_lopnr year, stable   /*Ascending order*/

drop id

g id=_n	
	su id, meanonly
		
	quietly{
	local i
		local s=`r(max)'-1
	forvalues i=1(1)`s'{
	local j=`i'+1
	
 replace sni2007=sni2007[`i'] in `j' if sni2007[`i']!="00000" & sni2007[`i']!="" & sni2007[`j']=="" & bidnr[`i']==bidnr[`j'] & cfarnr_lopnr[`i']==cfarnr_lopnr[`j']  
 
 }
 }

 drop id _merge year2

  merge m:1 sni92 using correspondence_92_2007_20231014.dta /*We intend to fill the gaps of SNI 2007 codes with the correspondence tables. To make sure we do not erase any existing SNI 2007 codes, we rename first sni2007 to sni2007_1*/
  
  
  
  g id=_n	
	su id, meanonly
		
	quietly{
	local i
		local s=`r(max)'
	forvalues i=1(1)`s'{
	
	
 replace sni2007_1=sni2007[`i'] in `i' if sni2007[`i']=="" & sni2007_1[`i']!="" 
 
 }
 }
 
 drop id _merge sni2007
 
 merge m:1 sni2002 using correspondence_2002_2007_20231014.dta  /*We merge the file with the next correspondence table*/
 
  
  g id=_n	
	su id, meanonly
		
	quietly{
	local i
		local s=`r(max)'
	forvalues i=1(1)`s'{
	
	
 replace sni2007_1=sni2007[`i'] in `i' if sni2007[`i']=="" & sni2007_1[`i']!="" 
 
 }
 }
 
 
drop id _merge sni2007

 /*Merge with emission*/
 
merge m:1 bidnr year using emissions_basic.dta

keep if _merge==3

drop _merge
sort bidnr year, stable

merge m:1 bidnr year using merge_UC_Serrano_firm_characteristics.dta

keep if _merge==3

drop _merge

sort bidnr cfarnr_lopnr year, stable
*This code generates the sample entitled :
save arbetstallen_emission_firm_characteristics, replace 


 